import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import folium # for leaflet maps
from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click to show/unshow codes."></form>''')
Here we will use the public data provided by JHU. The data and visualization are updated daily. https://github.com/CSSEGISandData/COVID-19
# read data
full_table = pd.read_csv('covid_19_clean_complete.csv', parse_dates=['Date'])
# combine china and mainland china
full_table['Country/Region'].replace({'China':'Mainland China'},inplace=True)
countries = full_table['Country/Region'].unique().tolist()
# filling missing values
full_table[['Province/State']] = full_table[['Province/State']].fillna('--')
print("\nTotal countries affected by CoVID-19 thus far: ",len(countries))
# cases in the Diamond Princess cruise ship
ship = full_table[full_table['Province/State']=='Diamond Princess cruise ship']
full_table = full_table[full_table['Province/State']!='Diamond Princess cruise ship']
# cases in china vs outside china
china = full_table[full_table['Country/Region']=='Mainland China']
row = full_table[full_table['Country/Region']!='Mainland China']
# latest numbers (cumulative)
full_latest = full_table[full_table['Date'] == max(full_table['Date'])].reset_index()
china_latest = full_latest[full_latest['Country/Region']=='Mainland China']
row_latest = full_latest[full_latest['Country/Region']!='Mainland China']
full_latest_grouped = full_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered'].sum().reset_index()
china_latest_grouped = china_latest.groupby('Province/State')['Confirmed', 'Deaths', 'Recovered'].sum().reset_index()
row_latest_grouped = row_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered'].sum().reset_index()
# global data
all_cases = full_table.groupby('Date')['Confirmed', 'Deaths', 'Recovered'].sum()
all_cases = all_cases.reset_index()
all_cases = all_cases.sort_values('Date', ascending=False)
all_cases.head(5).style.background_gradient(cmap='Pastel1')
# breakdown by countries, alphabetical
temp = full_latest.groupby(['Country/Region', 'Province/State'])['Confirmed', 'Deaths', 'Recovered'].max()
temp.style.background_gradient(cmap='Pastel1_r')
# breakdown by countries, sorted by cases
temp_f = full_latest_grouped[['Country/Region', 'Confirmed', 'Deaths', 'Recovered']]
temp_f = temp_f.sort_values(by='Confirmed', ascending=False)
temp_f = temp_f.reset_index(drop=True)
temp_f.style.background_gradient(cmap='Pastel1_r')
# breakdown of cases in China by provinces
temp_f = china_latest_grouped[['Province/State', 'Confirmed', 'Deaths', 'Recovered']]
temp_f = temp_f.sort_values(by='Confirmed', ascending=False)
temp_f = temp_f.reset_index(drop=True)
temp_f.style.background_gradient(cmap='Pastel1_r')
For the dashboard by JHU see https://www.arcgis.com/apps/opsdashboard/index.html#/bda7594740fd40299423467b48e9ecf6
rl = row_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered'].sum()
rl = rl.reset_index().sort_values(by='Confirmed', ascending=False).reset_index(drop=True)
ncl = rl.copy()
ncl['Affected'] = ncl['Confirmed'] - ncl['Deaths'] - ncl['Recovered']
ncl = ncl.melt(id_vars="Country/Region", value_vars=['Affected', 'Recovered', 'Deaths'])
fig = px.bar(ncl.sort_values(['variable', 'value']),
x="value", y="Country/Region", color='variable', orientation='h', height=800,
# height=600, width=1000,
title='Number of cases (excluding China)')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
# ------------------------------------------
cl = china_latest.groupby('Province/State')['Confirmed', 'Deaths', 'Recovered'].sum()
cl = cl.reset_index().sort_values(by='Confirmed', ascending=False).reset_index(drop=True)
# cl.head().style.background_gradient(cmap='rainbow')
ncl = cl.copy()
ncl['Affected'] = ncl['Confirmed'] - ncl['Deaths'] - ncl['Recovered']
ncl = ncl.melt(id_vars="Province/State", value_vars=['Affected', 'Recovered', 'Deaths'])
fig = px.bar(ncl.sort_values(['variable', 'value']),
y="value", x="Province/State", color='variable', orientation='v', height=800,
# height=600, width=1000,
title='Number of cases in China')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
# map of reported cases worldwide
m = folium.Map(location=[10, -20], tiles='openstreetmap',
min_zoom=1, max_zoom=4, zoom_start=2.3)
for i in range(0, len(full_latest)):
folium.CircleMarker(
location=[full_latest.iloc[i]['Lat'], full_latest.iloc[i]['Long']],
color='crimson',
tooltip = '<li><bold>Country : '+str(full_latest.iloc[i]['Country/Region'])+
'<li><bold>Province : '+str(full_latest.iloc[i]['Province/State'])+
'<li><bold>Confirmed : '+str(full_latest.iloc[i]['Confirmed'])+
'<li><bold>Deaths : '+str(full_latest.iloc[i]['Deaths'])+
'<li><bold>Recovered : '+str(full_latest.iloc[i]['Recovered']),
radius=int(full_latest.iloc[i]['Confirmed']**0.2 + 1),
fill_color='red',
fill_opacity=0.7).add_to(m)
m
formated_gdf = row.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf = formated_gdf[formated_gdf['Country/Region']!='Mainland China']
formated_gdf['Date'] = pd.to_datetime(formated_gdf['Date'])
formated_gdf['Date'] = formated_gdf['Date'].dt.strftime('%m/%d/%Y')
fig = px.scatter_geo(formated_gdf[formated_gdf['Country/Region']!='China'],
locations="Country/Region", locationmode='country names',
color="Confirmed", size='Confirmed', hover_name="Country/Region",
range_color= [0, max(formated_gdf['Confirmed'])+2],
projection="natural earth", animation_frame="Date",
title='Spread outside China over time')
fig.update(layout_coloraxis_showscale=False)
fig.show()
# -----------------------------------------------------------------------------------
china_map = china.groupby(['Date', 'Province/State'])['Confirmed', 'Deaths', 'Recovered',
'Lat', 'Long'].max()
china_map = china_map.reset_index()
china_map['size'] = china_map['Confirmed'].pow(0.5)
china_map['Date'] = pd.to_datetime(china_map['Date'])
china_map['Date'] = china_map['Date'].dt.strftime('%m/%d/%Y')
china_map.head()
fig = px.scatter_geo(china_map, lat='Lat', lon='Long', scope='asia',
color="size", size='size', hover_name='Province/State',
hover_data=['Confirmed', 'Deaths', 'Recovered'],
projection="natural earth", animation_frame="Date",
title='Spread in China over time')
fig.update(layout_coloraxis_showscale=False)
fig.show()
# countries by highest deaths
temp_flg = full_latest_grouped[['Country/Region', 'Deaths']]
temp_flg = temp_flg.sort_values(by='Deaths', ascending=False)
temp_flg = temp_flg.reset_index(drop=True)
temp_flg = temp_flg[temp_flg['Deaths']>0]
temp_flg.style.background_gradient(cmap='Pastel1_r')
# Countries with no cases recovered (yet)
temp = row_latest_grouped[row_latest_grouped['Recovered']==0]
temp = temp[['Country/Region', 'Confirmed', 'Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Pastel1_r')
temp = row_latest_grouped[row_latest_grouped['Confirmed']==
row_latest_grouped['Deaths']+
row_latest_grouped['Recovered']]
temp = temp[['Country/Region', 'Confirmed', 'Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Greens')
temp = china_latest_grouped[china_latest_grouped['Confirmed']==
china_latest_grouped['Deaths']+
china_latest_grouped['Recovered']]
temp = temp[['Province/State', 'Confirmed', 'Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Greens')
https://www.princess.com/news/notices_and_advisories/notices/diamond-princess-update.html
# Cases in the Diamond Princess Cruise Ship
temp = ship.sort_values(by='Date', ascending=False).head(1)
temp = temp[['Province/State', 'Date', 'Confirmed', 'Deaths', 'Recovered']].reset_index(drop=True)
temp.style.background_gradient(cmap='rainbow')
def location(row):
if row['Country/Region']=='Mainland China':
if row['Province/State']=='Hubei':
return 'Hubei'
else:
return 'Other Chinese Provinces'
else:
return 'Rest of the World'
temp = full_latest.copy()
temp['Region'] = temp.apply(location, axis=1)
temp = temp.groupby('Region')['Confirmed', 'Deaths', 'Recovered'].sum().reset_index()
temp = temp.melt(id_vars='Region', value_vars=['Confirmed', 'Deaths', 'Recovered'],
var_name='Case', value_name='Count').sort_values('Count')
fig = px.bar(temp, y='Region', x='Count', color='Case', barmode='group', orientation='h',
text='Count', title='Hubei - Rest of China - Rest of the World',
color_discrete_sequence= ['#EF553B', '#00CC96', '#636EFA'])
fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
gdf = full_table.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered'].max()
gdf = gdf.reset_index()
temp = gdf[gdf['Country/Region']=='Mainland China'].reset_index()
temp = temp.melt(id_vars='Date', value_vars=['Confirmed', 'Deaths', 'Recovered'],
var_name='Case', value_name='Count')
fig = px.bar(temp, x="Date", y="Count", color='Case', facet_col="Case",
title='Cases in China')
fig.show()
temp = gdf[gdf['Country/Region']!='Mainland China'].groupby('Date').sum().reset_index()
temp = temp.melt(id_vars='Date', value_vars=['Confirmed', 'Deaths', 'Recovered'],
var_name='Case', value_name='Count')
fig = px.bar(temp, x="Date", y="Count", color='Case', facet_col="Case",
title='Cases in rest of the World')
fig.show()
Note that China has 31 administrative divisions (省级行政区) under direct jurisdiction (excluding Hong Kong and Macau). https://en.wikipedia.org/wiki/Provinces_of_China
c_spread = china[china['Confirmed']!=0].groupby('Date')['Province/State'].unique().apply(len)
c_spread = pd.DataFrame(c_spread).reset_index()
fig = px.line(c_spread, x='Date', y='Province/State',
title='Number of provinces in China affected over time')
fig.show()
# ------------------------------------------------------------------------------------------
spread = full_table[full_table['Confirmed']!=0].groupby('Date')['Country/Region'].unique().apply(len)
spread = pd.DataFrame(spread).reset_index()
fig = px.line(spread, x='Date', y='Country/Region',
title='Number of countries affected over time')
fig.show()
temp = full_table.groupby('Date').sum().reset_index()
temp.head()
# adding two more columns
temp['% death'] = round(temp['Deaths']/
temp['Confirmed'], 3)*100
temp['% recovered'] = round(temp['Recovered']/
temp['Confirmed'], 3)*100
temp = temp.melt(id_vars='Date',
value_vars=['% death',
'% recovered'],
var_name='Ratio',
value_name='Value')
fig = px.line(temp, x="Date", y="Value", color='Ratio',
title='Recovery and Mortality Rate over Time')
fig.show()
fig = px.treemap(china_latest.sort_values(by='Confirmed', ascending=False).reset_index(drop=True),
path=["Province/State"], values="Confirmed",
title='Number of Confirmed Cases in Chinese Provinces',
color_discrete_sequence = px.colors.qualitative.Prism)
fig.show()
fig = px.treemap(china_latest.sort_values(by='Deaths', ascending=False).reset_index(drop=True),
path=["Province/State"], values="Deaths",
title='Number of Deaths Reported in Chinese Provinces',
color_discrete_sequence = px.colors.qualitative.Prism)
fig.show()
fig = px.treemap(china_latest.sort_values(by='Recovered', ascending=False).reset_index(drop=True),
path=["Province/State"], values="Recovered",
title='Number of Recovered Cases in Chinese Provinces',
color_discrete_sequence = px.colors.qualitative.Prism)
fig.show()
fig = px.treemap(row_latest, path=["Country/Region"], values="Confirmed",
title='# confirmed (excluding Mainland China)',
color_discrete_sequence = px.colors.qualitative.Pastel)
fig.show()
fig = px.treemap(row_latest, path=["Country/Region"], values="Deaths",
title='# deaths (excluding Mainland China)',
color_discrete_sequence = px.colors.qualitative.Pastel)
fig.show()
fig = px.treemap(row_latest, path=["Country/Region"], values="Recovered",
title='# recovered (excluding Mainland China)',
color_discrete_sequence = px.colors.qualitative.Pastel)
fig.show()
temp = full_table.groupby(['Country/Region', 'Date', ])['Confirmed', 'Deaths', 'Recovered']
temp = temp.sum().diff().reset_index()
mask = temp['Country/Region'] != temp['Country/Region'].shift(1)
temp.loc[mask, 'Confirmed'] = np.nan
temp.loc[mask, 'Deaths'] = np.nan
temp.loc[mask, 'Recovered'] = np.nan
fig = px.bar(temp, x="Date", y="Confirmed", color='Country/Region',
title='Number of new cases reported globally everyday')
fig.show()
fig = px.bar(temp[temp['Country/Region']!='Mainland China'], x="Date", y="Confirmed", color='Country/Region',
title='Number of daily new cases (excluding) China')
fig.show()
fig = px.bar(temp[temp['Country/Region']!='Mainland China'], x="Date", y="Deaths", color='Country/Region',
title='Number of daily new death cases (excluding China)')
fig.show()
epidemics = pd.DataFrame({
'epidemic' : ['COVID-19', 'SARS', 'EBOLA', 'MERS', 'H1N1'],
'start_year' : [2019, 2003, 2014, 2012, 2009],
'end_year' : [2020, 2004, 2016, 2017, 2010],
'confirmed' : [80000, 8096, 28646, 2494, 6724149],
'deaths' : [2750, 774, 11323, 858, 19654]
})
epidemics['% mortality'] = round((epidemics['deaths']/epidemics['confirmed'])*100, 2)
epidemics
temp = epidemics.melt(id_vars='epidemic', value_vars=['confirmed', 'deaths', '% mortality'],
var_name='Case', value_name='Value')
fig = px.bar(temp, x="epidemic", y="Value", color='epidemic', text='Value', facet_col="Case",
color_discrete_sequence = px.colors.qualitative.Bold)
fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_yaxes(showticklabels=False)
fig.layout.yaxis2.update(matches=None)
fig.layout.yaxis3.update(matches=None)
fig.show()
temp = full_table.groupby('Date')['Confirmed'].sum().reset_index()
covid = temp['Confirmed']
sars = [8096 for i in range(len(temp))]
ebola = [28646 for i in range(len(temp))]
mers = [2494 for i in range(len(temp))]
h1n1 = [6724149 for i in range(len(temp))]
plt.style.use('fivethirtyeight')
plt.figure(figsize=(20, 8))
ax = plt.plot(temp['Date'], covid, label='COVID-19 (2019-2020)', c='#555555', alpha=0.8)
ax = plt.plot(temp['Date'], sars, label='SARS (2003-2004)', c='#E71D36', ls='--', alpha=0.8)
ax = plt.plot(temp['Date'], ebola, label='EBOLA (2014-2016)', c='#FF9F1C', ls='--', alpha=0.8)
ax = plt.plot(temp['Date'], mers, label='MERS (2012-2017)', c='#2EC4B6', ls='--', alpha=0.8)
plt.title('Number of cases (excluding H1N1)')
plt.legend()
plt.show()
temp = full_table.groupby('Date')['Deaths'].sum().reset_index()
covid = temp['Deaths']
sars = [774 for i in range(len(temp))]
ebola = [11323 for i in range(len(temp))]
mers = [858 for i in range(len(temp))]
h1n1 = [19654 for i in range(len(temp))]
plt.figure(figsize=(20, 8))
ax = plt.plot(temp['Date'], covid, label='COVID-19 (2019-2020)', c='#555555', alpha=0.8)
ax = plt.plot(temp['Date'], sars, label='SARS (2003-2004)', c='#E71D36', ls='--', alpha=0.8)
ax = plt.plot(temp['Date'], ebola, label='EBOLA (2014-2016)', c='#FF9F1C', ls='--', alpha=0.8)
ax = plt.plot(temp['Date'], mers, label='MERS (2012-2017)', c='#2EC4B6', ls='--', alpha=0.8)
ax = plt.plot(temp['Date'], h1n1, label='H1N1 (2009-2010)', c='#2345BA', ls='--', alpha=0.8)
plt.title('Number of deaths')
plt.legend()
plt.show()